# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Load the dataset (assuming the file is located in the same directory)
df = pd.read_csv('sales.csv')
# Set some aesthetic preferences for plots
sns.set(style="whitegrid")
# Function to display different charts
def display_all_charts(df):
# 1. Countplot: Distribution of Regions
plt.figure(figsize=(10, 6))
sns.countplot(x='Region', data=df, palette='Set1')
plt.title('Distribution of Regions')
plt.xlabel('Region')
plt.ylabel('Count')
plt.show()
# 2. Barplot: Average Order Value by Region
plt.figure(figsize=(10, 6))
sns.barplot(x='Region', y='Average_Order_Value', data=df, palette='Blues')
plt.title('Average Order Value by Region')
plt.xlabel('Region')
plt.ylabel('Average Order Value (USD)')
plt.show()
# 3. Boxplot: Distribution of Purchase Frequency by Region
plt.figure(figsize=(10, 6))
sns.boxplot(x='Region', y='Purchase_Frequency', data=df, palette='Set2')
plt.title('Distribution of Purchase Frequency by Region')
plt.xlabel('Region')
plt.ylabel('Purchase Frequency')
plt.show()
# 4. Heatmap: Correlation Matrix between Variables
plt.figure(figsize=(10, 6))
corr_matrix = df.corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()
# 5. Violin Plot: Churn Probability by Region
plt.figure(figsize=(10, 6))
sns.violinplot(x='Region', y='Churn_Probability', data=df, palette='muted')
plt.title('Churn Probability by Region')
plt.xlabel('Region')
plt.ylabel('Churn Probability')
plt.show()
# 6. Pairplot: Relationships between Numeric Variables
sns.pairplot(df[['Lifetime_Value', 'Average_Order_Value', 'Purchase_Frequency', 'Time_Between_Purchases']])
plt.suptitle('Pairplot of Selected Numeric Variables', y=1.02)
plt.show()
# 7. Scatter Plot: Lifetime Value vs Average Order Value
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Lifetime_Value', y='Average_Order_Value', data=df, hue='Region', palette='Set2')
plt.title('Lifetime Value vs Average Order Value')
plt.xlabel('Lifetime Value (USD)')
plt.ylabel('Average Order Value (USD)')
plt.show()
# 8. Barplot: Retention Strategy Count
plt.figure(figsize=(10, 6))
sns.barplot(x=df['Retention_Strategy'].value_counts().index, y=df['Retention_Strategy'].value_counts().values, palette='Set3')
plt.title('Count of Retention Strategies')
plt.xlabel('Retention Strategy')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()
# 9. Line Plot: Average Order Value over Time (by Launch Date)
df['Launch_Date'] = pd.to_datetime(df['Launch_Date'])
plt.figure(figsize=(10, 6))
sns.lineplot(x='Launch_Date', y='Average_Order_Value', data=df, marker='o')
plt.title('Average Order Value over Time')
plt.xlabel('Launch Date')
plt.ylabel('Average Order Value (USD)')
plt.xticks(rotation=45)
plt.show()
# 10. Histogram: Distribution of Lifetime Value (use distplot for older Seaborn versions)
plt.figure(figsize=(10, 6))
sns.distplot(df['Lifetime_Value'], bins=30, color='skyblue', kde=True)
plt.title('Distribution of Lifetime Value')
plt.xlabel('Lifetime Value (USD)')
plt.ylabel('Frequency')
plt.show()
# 11. Boxplot: Lifetime Value by Season
plt.figure(figsize=(10, 6))
sns.boxplot(x='Season', y='Lifetime_Value', data=df, palette='coolwarm')
plt.title('Lifetime Value by Season')
plt.xlabel('Season')
plt.ylabel('Lifetime Value (USD)')
plt.show()
# 12. Barplot: Average Time Between Purchases by Product Category
plt.figure(figsize=(10, 6))
sns.barplot(x='Most_Frequent_Category', y='Time_Between_Purchases', data=df, palette='muted')
plt.title('Average Time Between Purchases by Product Category')
plt.xlabel('Product Category')
plt.ylabel('Average Time Between Purchases (days)')
plt.xticks(rotation=45)
plt.show()
# 13. Pie Chart: Distribution of Customer Retention Strategies
plt.figure(figsize=(8, 8))
retention_counts = df['Retention_Strategy'].value_counts()
retention_counts.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=sns.color_palette('Set3'))
plt.title('Customer Retention Strategies')
plt.ylabel('')
plt.show()
# 14. Line Plot: Cumulative Lifetime Value by Product
df_grouped = df.groupby('Product_ID')['Lifetime_Value'].sum()
plt.figure(figsize=(10, 6))
sns.lineplot(x=df_grouped.index, y=df_grouped.values, color='orange', marker='o')
plt.title('Cumulative Lifetime Value by Product')
plt.xlabel('Product ID')
plt.ylabel('Cumulative Lifetime Value (USD)')
plt.show()
# 15. FacetGrid: Time Between Purchases by Region (with scatter plot)
g = sns.FacetGrid(df, col="Region", height=6, aspect=1.5)
g.map(sns.scatterplot, 'Lifetime_Value', 'Time_Between_Purchases', alpha=0.7)
g.set_axis_labels('Lifetime Value (USD)', 'Time Between Purchases (days)')
g.set_titles("{col_name}")
plt.show()
# Call the function to display all charts
display_all_charts(df)
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Load the dataset (assuming the file is located in the same directory)
df = pd.read_csv('sales.csv')
# Set some aesthetic preferences for plots
sns.set(style="whitegrid")
# Function to display different charts
def display_all_charts(df):
# 1. Line Chart (e.g., Average Order Value over Time)
plt.figure(figsize=(10, 6))
df.groupby('Launch_Date')['Average_Order_Value'].mean().plot(kind='line')
plt.title('Average Order Value over Time')
plt.xlabel('Launch Date')
plt.ylabel('Average Order Value (USD)')
plt.xticks(rotation=45)
plt.show()
# 2. Bar Chart (e.g., Purchase Frequency by Region)
plt.figure(figsize=(10, 6))
df.groupby('Region')['Purchase_Frequency'].sum().plot(kind='bar', color='skyblue')
plt.title('Purchase Frequency by Region')
plt.xlabel('Region')
plt.ylabel('Purchase Frequency')
plt.xticks(rotation=45)
plt.show()
# 3. Scatter Plot (e.g., Lifetime Value vs. Average Order Value)
plt.figure(figsize=(10, 6))
plt.scatter(df['Lifetime_Value'], df['Average_Order_Value'], alpha=0.5, color='purple')
plt.title('Lifetime Value vs. Average Order Value')
plt.xlabel('Lifetime Value (USD)')
plt.ylabel('Average Order Value (USD)')
plt.show()
# 4. Histogram (e.g., Distribution of Time Between Purchases)
plt.figure(figsize=(10, 6))
df['Time_Between_Purchases'].plot(kind='hist', bins=50, color='orange', edgecolor='black')
plt.title('Distribution of Time Between Purchases')
plt.xlabel('Time Between Purchases (days)')
plt.ylabel('Frequency')
plt.show()
# 5. Boxplot (e.g., Distribution of Lifetime Value by Region)
plt.figure(figsize=(10, 6))
sns.boxplot(x='Region', y='Lifetime_Value', data=df, palette='Set2')
plt.title('Distribution of Lifetime Value by Region')
plt.xlabel('Region')
plt.ylabel('Lifetime Value (USD)')
plt.show()
# 6. Pie Chart (e.g., Proportion of Customer Retention Strategies)
plt.figure(figsize=(8, 8))
retention_counts = df['Retention_Strategy'].value_counts()
retention_counts.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=sns.color_palette('Set3'))
plt.title('Customer Retention Strategies')
plt.ylabel('')
plt.show()
# 7. Heatmap (e.g., Correlation Matrix between different numeric variables)
plt.figure(figsize=(10, 6))
corr_matrix = df.corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()
# 8. Violin Plot (e.g., Churn Probability by Region)
plt.figure(figsize=(10, 6))
sns.violinplot(x='Region', y='Churn_Probability', data=df, palette='muted')
plt.title('Churn Probability by Region')
plt.xlabel('Region')
plt.ylabel('Churn Probability')
plt.show()
# 9. Pairplot (e.g., Visualizing relationships between numeric variables)
sns.pairplot(df[['Lifetime_Value', 'Average_Order_Value', 'Purchase_Frequency', 'Time_Between_Purchases']])
plt.suptitle('Pairplot of Selected Numeric Variables', y=1.02)
plt.show()
# 10. Area Plot (e.g., Cumulative Total Lifetime Value by Product)
df.groupby('Product_ID')['Lifetime_Value'].sum().plot(kind='area', figsize=(10, 6), color='lightcoral', alpha=0.6)
plt.title('Cumulative Lifetime Value by Product')
plt.xlabel('Product ID')
plt.ylabel('Cumulative Lifetime Value (USD)')
plt.show()
# Call the function to display all charts
display_all_charts(df)
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Load the dataset (assuming the file is located in the same directory)
df = pd.read_csv('sales.csv')
# Set some aesthetic preferences for plots
sns.set(style="whitegrid")
# Function to display different charts
def display_all_charts(df):
# 1. Stacked Bar Plot: Purchase Frequency by Region and Retention Strategy
plt.figure(figsize=(10, 6))
pd.crosstab(df['Region'], df['Retention_Strategy']).plot(kind='bar', stacked=True, colormap='Paired', ax=plt.gca())
plt.title('Purchase Frequency by Region and Retention Strategy')
plt.xlabel('Region')
plt.ylabel('Purchase Frequency')
plt.xticks(rotation=45)
plt.show()
# 2. Scatter Plot: Time Between Purchases vs Average Order Value
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Time_Between_Purchases', y='Average_Order_Value', data=df, hue='Region', palette='viridis')
plt.title('Time Between Purchases vs Average Order Value')
plt.xlabel('Time Between Purchases (days)')
plt.ylabel('Average Order Value (USD)')
plt.show()
# 3. Heatmap: Region-wise Correlation of Numeric Variables
plt.figure(figsize=(10, 6))
# Select only numeric columns for correlation
numeric_df = df.select_dtypes(include=['float64', 'int64'])
region_corr = numeric_df.corr()
# Plot the heatmap
sns.heatmap(region_corr, annot=True, cmap='YlGnBu', linewidths=0.5)
plt.title('Correlation Matrix of Numeric Variables')
plt.show()
# 4. Bar Plot: Average Churn Probability by Region
plt.figure(figsize=(10, 6))
avg_churn_by_region = df.groupby('Region')['Churn_Probability'].mean()
avg_churn_by_region.plot(kind='bar', color='lightblue', ax=plt.gca())
plt.title('Average Churn Probability by Region')
plt.xlabel('Region')
plt.ylabel('Average Churn Probability')
plt.show()
# 5. Stripplot: Distribution of Time Between Purchases by Region
plt.figure(figsize=(10, 6))
sns.stripplot(x='Region', y='Time_Between_Purchases', data=df, jitter=True, palette='Set2')
plt.title('Distribution of Time Between Purchases by Region')
plt.xlabel('Region')
plt.ylabel('Time Between Purchases (days)')
plt.show()
# 6. Pairplot: Lifetime Value vs Purchase Frequency vs Churn Probability
sns.pairplot(df[['Lifetime_Value', 'Purchase_Frequency', 'Churn_Probability']])
plt.suptitle('Pairplot of Lifetime Value, Purchase Frequency, and Churn Probability', y=1.02)
plt.show()
# 7. Radar Chart: Comparison of Average Order Value, Lifetime Value, and Churn Probability by Region
def radar_chart(values, labels, title):
angles = [n / float(len(labels)) * 2 * 3.141592653589793 for n in range(len(labels))]
values += values[:1]
angles += angles[:1]
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
ax.fill(angles, values, color='orange', alpha=0.25)
ax.plot(angles, values, color='orange', linewidth=2)
ax.set_yticklabels([])
ax.set_xticks(angles[:-1])
ax.set_xticklabels(labels, fontsize=12)
ax.set_title(title, size=14)
plt.show()
radar_chart(
[df['Average_Order_Value'].mean(), df['Lifetime_Value'].mean(), df['Churn_Probability'].mean()],
['Avg Order Value', 'Lifetime Value', 'Churn Probability'],
'Region Comparison'
)
# 8. Histogram: Distribution of Purchase Frequency
plt.figure(figsize=(10, 6))
df['Purchase_Frequency'].plot(kind='hist', bins=30, color='green', edgecolor='black', ax=plt.gca())
plt.title('Distribution of Purchase Frequency')
plt.xlabel('Purchase Frequency')
plt.ylabel('Frequency')
plt.show()
# 9. Boxplot: Time Between Purchases by Season
plt.figure(figsize=(10, 6))
sns.boxplot(x='Season', y='Time_Between_Purchases', data=df, palette='muted')
plt.title('Time Between Purchases by Season')
plt.xlabel('Season')
plt.ylabel('Time Between Purchases (days)')
plt.show()
# Call the function to display all charts
display_all_charts(df)